clear all
set more off
set mem 200m

******************** merge data sets ********************

use c_ls, clear
sort folio ls 

merge folio using c_portad
tab _merge
rename _merge merge_portad
sort folio ls

merge folio ls using p_ed
tab _merge
rename _merge merge_ed
sort folio ls 

merge folio ls using p_mg
tab _merge
rename _merge merge_mg
sort folio ls 

merge folio ls using p_tb
tab _merge
rename _merge merge_tb
sort folio ls 

merge folio ls using p_cr
tab _merge
rename _merge merge_cr
sort folio ls 

merge folio ls using p_es
tab _merge
rename _merge merge_es
sort folio ls 

merge folio ls using p_gh
tab _merge
rename _merge merge_gh
sort folio ls 

gen str8 var1 = string(folio, "%08.0f")
gen str2 var2 = string(ls, "%02.0f")
gen pid_link = var1 + var2
sort pid_link

keep if (merge_ed==3 | merge_tb==3)
drop merge_ed merge_tb

******************** rename and recode variables *******************

********************
*** demographics ***
********************

gen state=edo
gen muni=mpio
gen urban=(estrato==1 | estrato==2) if estrato!=.
gen rural=(estrato==3 | estrato==4) if estrato!=.
gen locid=id_loc

gen age=ls02_2 
gen age2=age*age
gen sex=ls04 
gen marst=ls10
gen lives_in_hh=(ls09==1) if ls09!=.

gen byte male=(sex==1) if sex!=.
gen byte female=(sex==3) if sex!=.
gen byte married=(marst==5) if marst!=.

gen byte ones=1
bys folio: egen hhsize=count(ones)

***************** restrict data to male adults **********************

*****************
*** education ***
*****************

gen literate=(ed02==1) if ed02!=.

gen attend_roster=ls14
gen grade_roster=ls15_1 
replace grade_roster=3 if grade_roster==. & attend_roster==3
replace grade_roster=2 if grade_roster==. & (attend_roster==4 | attend_roster==6)

gen ever_attend=ed04
gen attend=ed05
gen grade=ed06_1
gen graduate=ed07

replace attend=1 if ever_attend==3
recode attend attend_roster grade grade_roster (98=.)
recode grade grade_roster graduate (8=.)

replace grade=3 if grade==. & attend==3
replace grade=2 if grade==. & (attend==4 | attend==6)
replace graduate=2 if graduate==. & (attend==5 | attend==7 | attend==8 | attend==9 | attend==10)

gen int yearsch=.
replace yearsch=0 if attend==0 | attend==1 | attend==2
replace yearsch=0+grade if attend==3
replace yearsch=6 if grade>6 & grade!=. & attend==3
replace yearsch=6+grade if attend==4 
replace yearsch=9 if grade>3 & grade!=. & attend==4 
replace yearsch=7 if (graduate==2 | graduate==3) & attend==5 
replace yearsch=9 if graduate==1 & attend==5 
replace yearsch=9+grade if attend==6
replace yearsch=12 if grade>3 & grade!=. & attend==6
replace yearsch=10 if (graduate==2 | graduate==3) & (attend==7 | attend==8)
replace yearsch=12 if graduate==1 & (attend==7 | attend==8)
replace yearsch=14 if (graduate==2 | graduate==3) & attend==9 
replace yearsch=16 if graduate==1 & attend==9
replace yearsch=17 if (graduate==2 | graduate==3) & attend==10 
replace yearsch=18 if graduate==1 & attend==10

gen int yearsch_roster=.
replace yearsch_roster=0 if attend_roster==0 | attend_roster==1 | attend_roster==2
replace yearsch_roster=0+grade_roster if attend_roster==3
replace yearsch_roster=6 if grade_roster>6 & grade_roster!=. & attend_roster==3
replace yearsch_roster=6+grade_roster if attend_roster==4 
replace yearsch_roster=9 if grade_roster>3 & grade_roster!=. & attend_roster==4 
replace yearsch_roster=round(6+((9-7)*.7253)) if attend_roster==5 
replace yearsch_roster=9+grade_roster if attend_roster==6
replace yearsch_roster=12 if grade_roster>3 & grade_roster!=. & attend_roster==6
replace yearsch_roster=round(9+((12-10)*0.4118)) if attend_roster==7 
replace yearsch_roster=round(9+((12-10)*0.7469)) if attend_roster==8 
replace yearsch_roster=round(12+((16-12)*0.4595)) if attend_roster==9 
replace yearsch_roster=round(16+((18-16)*0.6627)) if attend_roster==10

gen int yearsch_check=.
replace yearsch_check=0 if attend==0 | attend==1 | attend==2
replace yearsch_check=0+grade if attend==3
replace yearsch_check=6 if grade>6 & grade!=. & attend==3
replace yearsch_check=6+grade if attend==4 
replace yearsch_check=9 if grade>3 & grade!=. & attend==4 
replace yearsch_check=round(6+((9-7)*.7253)) if attend==5 
replace yearsch_check=9+grade if attend==6
replace yearsch_check=12 if grade>3 & grade!=. & attend==6
replace yearsch_check=round(9+((12-10)*0.4118)) if attend==7 
replace yearsch_check=round(9+((12-10)*0.7469)) if attend==8 
replace yearsch_check=round(12+((16-12)*0.4595)) if attend==9 
replace yearsch_check=round(16+((18-16)*0.6627)) if attend==10

**********************
*** health outcomes***
**********************

gen int health=es01
gen goodhealth=(health==1 | health==2) if health!=.
gen int health_rel=es16
gen goodhealth_rel=(health_rel==1 | health_rel==2) if health_rel!=.

gen byte smoke=(gh06==1) if gh06!=.


**********************************
*** labor market participation ***
**********************************

gen byte worked_roster=(ls12==1) if ls12!=.

gen empstat_main=tb02_1 if tb02_1!=.
gen worked_main=(tb02_1==1) if tb02_1!=.
gen worked_wk=(tb02_1==1 | tb03==1 | tb04==1) if tb02_1!=. | tb03!=. | tb04!=. 
gen worked_yr=(tb02_1==1 | tb03==1 | tb04==1 | tb05==1 | tb07==1) if tb02_1!=. | tb03!=. | tb04!=. | tb05!=. | tb07!=.
gen worked_ever=(tb02_1==1 | tb03==1 | tb04==1 | tb05==1 | tb06==1) if tb02_1!=. | tb03!=. | tb04!=. | tb05!=. | tb07!=. | tb06!=.
gen worked=worked_wk

gen occ=tb23_25p_cmo 
gen ind=tb23_25p_scian 
gen hourswk=tb26p_2
gen uhrswk=tb27p_2 
gen classwk=tb31p 
gen numweeks=tb28p_2 if tb28p_1==1
replace numweeks=52 if tb28p_1==2
replace numweeks=. if numweeks>52
gen hoursyr=numweeks*uhrswk

****************
*** earnings ***
****************

gen earnings_roster=ls13_2 
*** 13 outlier observations
replace earnings_roster=. if earnings_roster>900000 & earnings!=. 

*** monthly reports ***

gen income_m=tb34a_2
egen incometot_m=rowtotal(tb34aa_2 tb34ab_2 tb34ac_2 tb34ad_2 tb34ae_2 tb34af_2 tb34ag_2 tb34ah_2 tb34ai_2)
egen incometot_m_miss=rowmiss(tb34aa_2 tb34ab_2 tb34ac_2 tb34ad_2 tb34ae_2 tb34af_2 tb34ag_2 tb34ah_2 tb34ai_2)
replace incometot_m=. if incometot_m_miss==9

gen income_m_profits=tb36p_2

egen earnings_m=rowtotal(income_m incometot_m income_m_profits)
egen earnings_m_miss=rowmiss(income_m incometot_m income_m_profits)
replace earnings_m=. if earnings_m_miss==3


*** yearly reports ***

gen income_yr=tb35a_2
*** 2 outlier observations
replace income_yr=. if tb35a_2>1000000  
egen incometot_yr=rowtotal(tb35aa_2 tb35ab_2 tb35ac_2 tb35ad_2 tb35ae_2 tb35af_2 tb35ag_2 tb35ah_2 tb35ai_2 tb35aj_2 tb35ak_2 tb35al_2 tb35am_2)
egen incometot_yr_miss=rowmiss(tb35aa_2 tb35ab_2 tb35ac_2 tb35ad_2 tb35ae_2 tb35af_2 tb35ag_2 tb35ah_2 tb35ai_2 tb35aj_2 tb35ak_2 tb35al_2 tb35am_2)
replace incometot_yr=. if incometot_yr_miss==13

gen income_yr_profits=tb37p_2

egen earnings_yr=rowtotal(income_yr incometot_yr income_yr_profits)
egen earnings_yr_miss=rowmiss(income_yr incometot_yr income_yr_profits)
replace earnings_yr=. if earnings_yr_miss==3


*** calculate log earnings ***

gen ln_earnings_roster=ln(earnings_roster)
gen ln_earnings_m=ln(earnings_m)
gen ln_earnings_yr=ln(earnings_yr)


***************
*** 2nd job ***
***************

*** 2nd job hours and weeks ***
gen uhrswk_2nd=tb27s_2 
gen numweeks_2nd=tb28s_2 if tb28s_1==1
replace numweeks_2nd=52 if tb28s_1==2
gen hoursyr_2nd=numweeks_2nd*uhrswk_2nd

*** earnings ***
gen income_m_2nd=tb34b_2
gen income_m_profits_2nd=tb36s_2
egen earnings_m_2nd=rowtotal(income_m_2nd income_m_profits_2nd)
egen earnings_m_miss_2nd=rowmiss(income_m_2nd income_m_profits_2nd)
replace earnings_m_2nd=. if earnings_m_miss_2nd==2

gen income_yr_2nd=tb35b_2
gen income_yr_profits_2nd=tb37s_2
egen earnings_yr_2nd=rowtotal(income_yr_2nd income_yr_profits_2nd)
egen earnings_yr_miss_2nd=rowmiss(income_yr_2nd income_yr_profits_2nd)
replace earnings_yr_2nd=. if earnings_yr_miss_2nd==2


**************************************************
*** add 2nd job earnings and hours to main job ***
**************************************************

egen earnings_m_=rowtotal(earnings_m earnings_m_2nd)
egen earnings_m_miss_=rowmiss(earnings_m earnings_m_2nd)
replace earnings_m_=. if earnings_m_miss_==2
drop earnings_m
rename earnings_m_ earnings_m

egen earnings_yr_=rowtotal(earnings_yr earnings_yr_2nd)
egen earnings_yr_miss_=rowmiss(earnings_yr earnings_yr_2nd)
replace earnings_yr_=. if earnings_yr_miss_==2
drop earnings_yr
rename earnings_yr_ earnings_yr

egen hoursyr_=rowtotal(hoursyr hoursyr_2nd)
egen hoursyr_miss_=rowmiss(hoursyr hoursyr_2nd)
replace hoursyr_=. if hoursyr_miss_==2
drop hoursyr
rename hoursyr_ hoursyr


****************
*** last job ***
****************

gen lastworked_m=(tb08_2)
gen lastworked_yr=(tb09_2)
replace lastworked_yr=2002 if tb07==1
gen worked_last=1 if (worked_ever==1 & worked==0)
replace worked_last=. if lastworked_yr<1992

gen occ_last=tb14_16_cmo 
gen ind_last=tb14_16_scian 
gen classwk_last=tb17 
gen uhrswk_last=tb12_2
gen numweeks_last=tb13_2 if tb13_1==1
replace numweeks_last=52 if tb13_1==2
replace numweeks_last=. if numweeks_last>52

*** monthly reports ***
gen income_m_last=tb20_2
egen incometot_m_last=rowtotal(tb20a_2 tb20b_2 tb20c_2 tb20d_2 tb20e_2 tb20f_2 tb20g_2)
egen incometot_m_miss_last=rowmiss(tb20a_2 tb20b_2 tb20c_2 tb20d_2 tb20e_2 tb20f_2 tb20g_2)
replace incometot_m_last=. if incometot_m_miss_last==7
replace income_m_last=income_m_last*12
replace incometot_m_last=incometot_m_last*12

*** yearly reports ***
gen income_yr_last=tb21_2
egen incometot_yr_last=rowtotal(tb21h_2 tb21i_2 tb21j_2 tb21k_2 tb21l_2 tb21m_2)
egen incometot_yr_miss_last=rowmiss(tb21h_2 tb21i_2 tb21j_2 tb21k_2 tb21l_2 tb21m_2)
replace incometot_yr_last=. if incometot_yr_miss_last==6

*** profits reports ***
gen income_m_profits_last=tb21_2
replace income_m_profits_last=income_m_profits_last*12

*** sum earnings ***
egen earnings_yr_last=rowtotal(income_yr_last incometot_yr_last income_m_last incometot_m_last income_m_profits_last)
egen earnings_yr_miss_last=rowmiss(income_yr_last incometot_yr_last income_m_last incometot_m_last income_m_profits_last)
replace earnings_yr_last=. if earnings_yr_miss_last==5
gen earnings_m_last=earnings_yr_last/12

*** calculate log earnings ***
gen ln_earnings_yr_last=ln(earnings_yr_last)
gen ln_earnings_m_last=ln(earnings_m_last)


**************
*** credit ***
**************

gen byte tanda=(cr04==1) if cr04!=.
gen byte buycredit=(cr06==1) if cr06!=.
gen byte canborrow=(cr08==1) if cr08!=.
gen byte saved=(cr27==1) if cr27!=.
gen byte savings=0 if saved==0
replace savings=cr28_2 if cr28_2!=.
replace savings=. if savings>10000000


**********************
*** past migration ***
**********************

gen byte moved_mg=(mg06==1) if mg06!=.
gen moved=moved_mg

gen byte visitUS=(mg09p_2=="UNITED STATES" | mg04p_2=="UNITED STATES")


***********************
****** save data ******
***********************

keep folio ls literate ever_attend attend grade graduate yearsch yearsch_check empstat_main worked_main worked_wk worked_yr worked_ever worked occ ind hourswk uhrswk classwk numweeks income_m incometot_m incometot_m_miss income_m_profits earnings_m earnings_m_miss ln_earnings_m income_yr incometot_yr incometot_yr_miss income_yr_profits earnings_yr earnings_yr_miss ln_earnings_yr lastworked_m lastworked_yr worked_last occ_last ind_last classwk_last uhrswk_last numweeks_last income_m_last incometot_m_last incometot_m_miss_last income_yr_last incometot_yr_last incometot_yr_miss_last income_m_profits_last earnings_yr_last earnings_yr_miss_last earnings_m_last ln_earnings_yr_last ln_earnings_m_last moved visitUS tanda canborrow buycredit saved savings hoursyr health health_rel goodhealth goodhealth_rel smoke 
renvars literate ever_attend attend grade graduate yearsch yearsch_check empstat_main worked_main worked_wk worked_yr worked_ever worked occ ind hourswk uhrswk classwk numweeks income_m incometot_m incometot_m_miss income_m_profits earnings_m earnings_m_miss ln_earnings_m income_yr incometot_yr incometot_yr_miss income_yr_profits earnings_yr earnings_yr_miss ln_earnings_yr lastworked_m lastworked_yr worked_last occ_last ind_last classwk_last uhrswk_last numweeks_last income_m_last incometot_m_last incometot_m_miss_last income_yr_last incometot_yr_last incometot_yr_miss_last income_m_profits_last earnings_yr_last earnings_yr_miss_last earnings_m_last ln_earnings_yr_last ln_earnings_m_last moved visitUS tanda canborrow buycredit saved savings hoursyr health_rel goodhealth goodhealth_rel smoke, postfix(_proxy)
compress
sort folio ls

save mxfls02_proxy, replace
